# Computations
import numpy as np
import pandas as pd
# preprocessing
from sklearn.impute import SimpleImputer
import re
# Visualisation libraries
## matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina'
## missingno
import missingno as msno
import warnings
warnings.filterwarnings("ignore")
In this article, we analyze and predict customer churn for Telco Customer Churn data.
| Columns | Description |
|---|---|
| customerID | Customer ID |
| gender | Whether the customer is a male or a female |
| SeniorCitizen | Whether the customer is a senior citizen or not (1, 0) |
| Partner | Whether the customer has a partner or not (Yes, No) |
| Dependents | Whether the customer has dependents or not (Yes, No) |
| tenure | Number of months the customer has stayed with the company |
| PhoneService | Whether the customer has a phone service or not (Yes, No) |
| MultipleLines | Whether the customer has multiple lines or not (Yes, No, No phone service) |
| InternetService | Customer’s internet service provider (DSL, Fiber optic, No) |
| OnlineSecurity | Whether the customer has online security or not (Yes, No, No internet service) |
| OnlineBackup | Whether the customer has an online backup or not (Yes, No, No internet service) |
| DeviceProtection | Whether the customer has device protection or not (Yes, No, No internet service) |
| TechSupport | Whether the customer has tech support or not (Yes, No, No internet service) |
| StreamingTV | Whether the customer has streaming TV or not (Yes, No, No internet service) |
| StreamingMovies | Whether the customer has streaming movies or not (Yes, No, No internet service) |
| Contract | The contract term of the customer (Month-to-month, One year, Two years) |
| PaperlessBilling | Whether the customer has paperless billing or not (Yes, No) |
| PaymentMethod | The customer’s payment method (Electronic check, Mailed check, Bank transfer (automatic), Credit card (automatic)) |
| MonthlyCharges | The amount charged to the customer monthly |
| TotalCharges | The total amount charged to the customer |
| Churn | Whether the customer churned or not (Yes or No) |
Data = pd.read_csv('telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')
def Data_info(Inp, Only_NaN = False):
Out = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = Out.join(Inp.isnull().sum().to_frame(name = 'Number of NaN Values'), how='outer')
Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
if Only_NaN:
Out = Out.loc[Out['Number of NaN Values']>0]
return Out
def dtypes_group(Inp):
Temp = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = pd.DataFrame(index =Temp['Data Type'].unique(), columns = ['Columns'])
for c in Temp['Data Type'].unique():
Out.loc[Out.index == c, 'Columns'] = [Temp.loc[Temp['Data Type'] == c].index.tolist()]
return Out
_ = msno.bar(Data, figsize=(16,5), fontsize=14, log=False, color="#34495e")
def text_sep(txt): return re.sub(r"(\w)([A-Z])", r"\1 \2", txt)
def col_details(Col):
print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s:' % text_sep(Col))
print(Style.RESET_ALL)
print('%s' % ', '.join(Data[Col].unique()))
Data.rename(columns = {'gender':'Gender', 'tenure':'Tenure'}, inplace = True)
Data.columns = [text_sep(txt) for txt in Data.columns.tolist()]
Feature = 'Gender'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['aquamarine', 'steelblue']
SC = 'Navy'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color= SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
What stands out from the graph is that nearly 27% of the customers churned, and there is a balance between the two genders among churned customers.
Temp = Data.copy()
Temp['Senior Citizen'] = Temp['Senior Citizen'].map(lambda x: 'Yes' if x ==1 else 'No')
Feature = 'Senior Citizen'
Temp = Temp.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['greenyellow', 'seagreen']
SC = 'DarkGreen'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
It can be seen that only 25.5% of the senior customers were churned.
Temp = Data[['Gender','Senior Citizen','Churn']]
Temp['Senior Citizen'] = Temp['Senior Citizen'].map(lambda x: 'Yes' if x ==1 else 'No')
Temp = Temp.groupby(['Gender','Senior Citizen','Churn'])['Churn'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp = Temp[(Temp.T != 0).any()]
display(Temp)
Temp.reset_index(drop = False, inplace = True)
# Figures
C = ['greenyellow', 'seagreen']
SC = 'DarkGreen'
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Churned', 'Remaining'))
# Top
fig1 = px.bar(Temp.loc[Temp.Churn == 'Yes'], y= 'Senior Citizen', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = C, height= 200)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Churn == 'Yes'], y= 'Senior Citizen', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = C, height= 200)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Senior Citizen', row=1, col=1)
fig.update_yaxes(title_text='Senior Citizen', row=2, col=1)
fig.update_layout(title={'text': 'Customer Churn by Senior Citizen and Gender',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Senior citizens churned much less than non-senior citizens in both genders.
Feature = 'Partner'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['bisque', 'orange']
SC = 'DarkOrange'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
Over 64% of churned customers did not have any partners.
Feature = 'Dependents'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['pink', 'hotpink']
SC = 'DarkRed'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
Over 82 percent of churned customers did not have any dependents.
Feature = 'Tenure'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage',color_continuous_scale= 'ylgn', height= 450)
fig.show()
C = ['violet', 'mediumorchid']
SC = 'Indigo'
fig = px.bar(Temp, x= Feature, y= 'Percentage',
color = 'Churn', text = 'Percentage', color_discrete_sequence= C, height= 500)
fig.update_traces(marker_line_color=SC, marker_line_width=1.2, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['yaxis'].update(range=[0, 10])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Feature
Customers with a higher tenure tend to churn less.
Feature = 'Contract'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['greenyellow', 'limeGreen','DarkGreen']
SC = 'DarkGreen'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 240)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
The majority of churned customers were on a month-to-month base contract.
Feature = 'Payment Method'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['azure','paleturquoise','steelblue','MidnightBlue']
SC = 'Navy'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 260)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
'x':0.5, 'y':0.9,
'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
values= Temp.loc[Temp.Churn == 'Yes','count'].values,
name= 'Churn [Yes]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)
fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
values=Temp.loc[Temp.Churn == 'No','count'].values,
name= 'Churn [No]', textfont=dict(size=16),
marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)
fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
dict(text='Remaining', x=0.85, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
Customers with an automatic payment method churned less.
Temp = Data[['Monthly Charges', 'Tenure', 'Churn']]
Temp['Monthly Charges'] = Temp['Monthly Charges']
Temp['Tenure'] = pd.to_numeric(Temp['Tenure'], errors='coerce')
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
Temp['Tenure'] = imp.fit_transform(Temp['Tenure'].values.reshape(-1,1))
bins = pd.IntervalIndex.from_tuples([(18, 40), (40, 60), (60, 80),(80, 100), (100, 120)])
Temp['Monthly Charges'] = pd.cut(Temp['Monthly Charges'], bins)
bins = pd.IntervalIndex.from_tuples([(0,6), (6, 12), (12, 18), (18, 24), (24, 36), (36, 60), (60, 80)])
Temp['Tenure'] = pd.cut(Temp['Tenure'], bins)
del bins
fig = make_subplots(rows=1, cols=2, shared_yaxes=True,
subplot_titles=('Customer Churn by Monthly Charges',
'Customer Churn by Tenure'))
C = ['LightCoral', 'LimeGreen']
SC = 'Black'
# Left
Temp0 = Temp.groupby(['Monthly Charges', 'Churn'])['Churn'].agg({'count'})
Temp0['Percentage'] = np.round(100* Temp0.values /Temp0.sum().values, 2)
Temp0 = Temp0.reset_index()
Temp0['Monthly Charges'] = Temp0['Monthly Charges'].astype(str)
fig1 = px.bar(Temp0, x= 'Monthly Charges', y= 'Percentage', color = 'Churn',
text = 'Percentage', color_discrete_sequence= C)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=.8, opacity=1, showlegend = False, row=1, col=1)
# Right
Temp0 = Temp.groupby(['Tenure', 'Churn'])['Churn'].agg({'count'})
Temp0['Percentage'] = np.round(100* Temp0.values /Temp0.sum().values, 2)
Temp0 = Temp0.reset_index()
Temp0['Tenure'] = Temp0['Tenure'].astype(str)
fig2 = px.bar(Temp0, x= 'Tenure', y= 'Percentage', color = 'Churn',
text = 'Percentage', color_discrete_sequence= C)
fig2.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig2['data'][1], row=1, col=2)
# Updates
fig.update_yaxes(title_text='Percentage', range=[0, 30], row=1, col=1)
fig.update_layout(barmode='stack', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.show()
What can be seen from this left plot is that as the monthly charges increase, the churn rate increases as well. However, it seems that customers with monthly charges around 100$ or above churn less than customers with a monthly charge of 80 to 100.
Moreover, we can see from the right plot that the churn rate decreases as tenure increases.
Alternatively, we can also demonstrate the above figure as follows
Temp = Data[['Monthly Charges', 'Tenure', 'Churn']]
Temp['Monthly Charges'] = Temp['Monthly Charges']
Temp['Tenure'] = pd.to_numeric(Temp['Tenure'], errors='coerce')
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
Temp['Tenure'] = imp.fit_transform(Temp['Tenure'].values.reshape(-1,1))
C = ['hotpink', 'steelblue']
SC = 'indigo'
fig = px.scatter(Temp, x= 'Monthly Charges', y= 'Tenure', color = 'Churn',
color_discrete_sequence= C, hover_data= Temp.columns, height= 600)
fig.update_traces(marker_line_color=SC, marker_line_width=0.5, opacity=1)
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[17, 120])
fig['layout']['yaxis'].update(range=[-1, 80])
fig.update_layout(title = 'Customer Churn by Gender', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray',
zeroline=True, zerolinewidth=1, zerolinecolor='Lightgray')
fig.update_layout(title={'text': 'Customer Churn by Monthly Charges and Tenure',
'x':0.5, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = make_subplots(rows=1, cols=2, subplot_titles=('by Monthly Charges and Gender', 'by Tenure and Gender'))
Colors = ['LightCoral', 'LimeGreen']
LC = 'Black'
# Left
fig1 = px.box(Data, x='Gender', y='Monthly Charges', color='Churn',
hover_data=['Gender','Monthly Charges','Churn'], color_discrete_sequence= Colors[::-1])
fig1.update_traces(quartilemethod='linear')
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= LC, marker_line_width=.8, opacity=1, showlegend = False, row=1, col=1)
# # Right
fig2 = px.box(Data, x='Gender', y='Tenure', color='Churn',
hover_data=['Gender','Tenure','Churn'], color_discrete_sequence= Colors[::-1])
fig2.update_traces(quartilemethod='linear')
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig2['data'][1], row=1, col=2)
# Updates
fig.update_layout(boxmode='group')
fig.update_yaxes(title_text='Percentage', range=[-2, 140], row=1, col=1)
fig.update_yaxes(title_text='Percentage', range=[-2, 80], row=1, col=2)
fig.update_layout(barmode='stack', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray',
zeroline=True, zerolinewidth=1, zerolinecolor='Lightgray')
fig.update_layout(title={'text': 'Customer Churn',
'x':0.5, 'y':0.88,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
For classification and modeling, please see the next files in the directory.